set.seed(1)
#if (!requireNamespace("BiocManager", quietly = TRUE))   
#  install.packages("BiocManager",repos = "http://cran.us.r-project.org")  
#BiocManager::install("fgsea")
#BiocManager::install("limma")
#BiocManager::install("Biobase")
#install.packages("ggplot2")
#install.packages("igraph")
library(limma)
library(Biobase)
library(ggplot2)
library(igraph)
library(data.table)
library(fgsea)
library(readr)
library(biomaRt)
library(edgeR)

##############################
# Number of mRNA
n_mRNA = 19504

bonobo = data.frame(fread("/home/esaha/BONOBO/network/miRNA_mRNA_breastCancer/indegree_miRNA.txt"))
genes = bonobo$V1

indegree = data.frame(bonobo[1:n_mRNA,-1])
rownames(indegree) = genes[1:n_mRNA]

# Get phenotypic data
phenotype = read.csv("~/BONOBO/data/miRNA_mRNA_breastCancer/GSE19783_phenotype.txt", sep="")
phenotype = phenotype[match(colnames(indegree), rownames(phenotype)),]
subtypes = phenotype$breast.cancer.subtype.ch1[match(colnames(indegree), rownames(phenotype))]
subtypes[is.na(subtypes)] = "not classified"
#subtypes[subtypes == "not classified"] = "unknown"

# limma model to identify difference between each pair of subtypes
subtypes = factor(subtypes)

# Compute pathway score
# Get KEGG pathways
pathways <- gmtPathways("/home/ubuntu/c2.cp.kegg.v7.1.symbols.gmt")
pathScore = lapply(pathways, function(x){colMeans(indegree[which(rownames(indegree) %in% x),])})

# Cox proportional Hazard Model
# Create survival object

time <-  phenotype$disease.free.survival.time..months..ch1
time <- time/12
status <- unlist(lapply(strsplit(phenotype$death.status.ch1, split = " ", fixed = T), function(x){x[1]}))
status <- sub("Alive", 0, status)
status <- sub("Dead", 1, status)
status <- as.numeric(status)
## Load survival package
library(survival)
SurvObj = Surv(time, status)

# Table for how many significant genes are differentially coexpressed between pair of subtypes
table_zStat = matrix(0, length(pathScore), length(levels(subtypes)))
rownames(table_zStat) = names(pathScore)
colnames(table_zStat) = levels(subtypes)
table_pval = table_zStat
for (i in 1:ncol(table_zStat)){
  subtypes0 = relevel(subtypes, ref = levels(subtypes)[i])
  for (j in 1:nrow(table_zStat)){
    score = as.numeric(pathScore[[j]])
    cox_model <- coxph(SurvObj ~ score*subtypes0)
    coeff_table = summary(cox_model)$coefficients
    table_zStat[j,i] = coeff_table["score", 4]
    table_pval[j,i] = coeff_table["score", 5]
  }
}
head(table_zStat)
head(table_pval)

sig_paths = rownames(table_pval)[apply(table_pval, MARGIN = 1, function(x){min(x[-length(x)])<0.05})]
sig_path_limma = read.delim("/home/esaha/BONOBO/plots/mRNA_miRNA_breastCancer/GSE19783_miRNA_sigPathways_limma.txt")
sig_paths = intersect(sig_paths, unlist(sig_path_limma))
tab_subset = table_zStat[match(sig_paths, rownames(table_zStat)),-ncol(table_zStat)]
rownames(tab_subset) = stringr::str_to_title(lapply(strsplit(rownames(tab_subset), split = "_"), function(x){paste(x[-1], collapse = " ")}))

# A matrix of characters (stars) to indicate which results are significant
lab = table_pval
lab[which(table_pval < 0.05)] = "*"
lab[which(table_pval < 0.01)] = "**"
lab[which(table_pval >= 0.05)] = ""
lab_subset = lab[match(sig_paths, rownames(table_pval)),-ncol(table_zStat)]

# Make heatmap comparing all groups

library(ggplot2)
library(gplots)
library(RColorBrewer)

mycol2 <- colorRampPalette(rev(brewer.pal(11,"RdBu")))(50)
pdf("/home/esaha/BONOBO/plots/mRNA_miRNA_breastCancer/GSE19783_pathway_survival_miRNAindegree.pdf",h=25,w=16)
heatmap.2(as.matrix(tab_subset),density.info="none",trace="none",col=mycol2,symbreaks=T,symkey=T, 
          cexRow=1.5, cexCol=1.5, srtCol = 45, keysize=0.5, mar=c(20,50), key.title=NULL, key.xlab="NES", cellnote=lab_subset, notecol="black", notecex=5)
dev.off()

##########################
# Pathways interacting with TP53 mutation
tp53 = factor(phenotype$tp53.mutation.status.ch1)
her2 = factor(phenotype$her2..fish..status.ch1)
#mutation_status = factor(phenotype$estrogen.receptor.status.ch1)

mutation_status = tp53

# Table for how many significant genes are differentially coexpressed between pair of subtypes
table_zStat = matrix(0, length(pathScore), length(levels(mutation_status)))
rownames(table_zStat) = names(pathScore)
colnames(table_zStat) = levels(mutation_status)
table_pval = table_zStat
for (i in 1:ncol(table_zStat)){
  mutation_status0 = relevel(mutation_status, ref = levels(mutation_status)[i])
  for (j in 1:nrow(table_zStat)){
    score = as.numeric(pathScore[[j]])
    cox_model <- coxph(SurvObj ~ score*mutation_status0 + subtypes)
    coeff_table = summary(cox_model)$coefficients
    table_zStat[j,i] = coeff_table["score", 4]
    table_pval[j,i] = coeff_table["score", 5]
  }
}
head(table_zStat)
head(table_pval)

sig_paths = rownames(table_pval)[apply(table_pval, MARGIN = 1, function(x){min(x[-length(x)])<0.1})]
sig_path_limma = read.delim("/home/esaha/BONOBO/plots/mRNA_miRNA_breastCancer/GSE19783_miRNA_sigPathways_limma.txt")
sig_paths = intersect(sig_paths, unlist(sig_path_limma))
tab_subset = table_zStat[match(sig_paths, rownames(table_zStat)),]
rownames(tab_subset) = stringr::str_to_title(lapply(strsplit(rownames(tab_subset), split = "_"), function(x){paste(x[-1], collapse = " ")}))

# A matrix of characters (stars) to indicate which results are significant
lab = table_pval
lab[which(table_pval < 0.1)] = ".."
lab[which(table_pval < 0.05)] = "*"
lab[which(table_pval >= 0.1)] = ""
lab_subset = lab[match(sig_paths, rownames(table_pval)),]

# Make heatmap comparing all groups

library(ggplot2)
library(gplots)
library(RColorBrewer)

mycol2 <- colorRampPalette(rev(brewer.pal(11,"RdBu")))(50)
pdf("/home/esaha/BONOBO/plots/mRNA_miRNA_breastCancer/GSE19783_pathway_survival_miRNAindegree_tp53.pdf",h=25,w=16)
heatmap.2(as.matrix(tab_subset),density.info="none",trace="none",col=mycol2,symbreaks=T,symkey=T, 
          cexRow=1.5, cexCol=1.5, srtCol = 45, keysize=0.5, mar=c(100,50), key.title=NULL, key.xlab="NES", cellnote=lab_subset, notecol="black", notecex=5)
dev.off()
